## DiD analysis for local electoral results 
## Requires a single data file: dataset-mcmv-by-localelectoral-period.RData, 
## Data file was assembled in Assemble_ElectoralPeriod_Local.R
## Assumes data file is in a subfolder "Data"
## Saves tables in .tex format and figures in .pdf to a subfolder "Figures"

## Code consists of the following parts:
## 1) minor recoding of variables for analysis
## 2) analysis by cycle (two cycles)
## 3) regressions on combined (stacked) data from all cycles
## 4) assemble results (table and descriptives) reported in main body of the paper
## 5) assemble tables and descriptives for appendices

rm(list=ls())
library(estimatr)
library(plm)
require(lmtest)
library(texreg)
library(MatchIt)
library(xtable)
#library(remotes)
#remotes::install_github("pedrohcgs/DRDID")
library(DRDID)

## Load Data assembled in Assemble_ElectoralPeriod.R
load("Data/dataset-mcmv-by-localelectoral-period.RData")
cat(comment(d))

## (1) Recoding some variables and examining descriptives #####

d$N.pc <- d$N/(d$pop/100) #benefic. per 100 people
d$N.hd <- d$N/(d$hdtot.2010) #beneficiaries relative to housing deficit
d$N.bin <- d$N>0 # Create dichotomous MCMV indicator (main treatment variable)
table(Period=d$elec.period,d$N.bin)
d$inc.vs0 <- ifelse(is.na(d$inc.vs),0,d$inc.vs) # Make no incumbent cases vs = 0


## (2) Cycle by cycle analysis ######

## (2a) 2008-2012 electoral cycle 
d1 <- subset(d,elec.period==0|elec.period==1)
  ## Prepare the data for estimation
  d1$MCMV <- is.element(d1$Cod_IBGE,d1$Cod_IBGE[which(d1$N.bin==T)])
  d1$POST <- as.numeric(d1$elec.period==1)
  #Repeat the "treatment" value in the pre-treatment period
  tmp <- subset(d1,select=c(Cod_IBGE,N.pc,N.hd),elec.period==1)
  d1 <- merge(d1,tmp,by="Cod_IBGE",suffixes=c("",".post"),all.x=T)
  
  ## Actual regressions
  reg1.1 <- lm_robust(inc.vs0~POST*MCMV,data=d1, clusters = Cod_IBGE)
  reg1.2 <- lm_robust(inc.vs0~POST*N.pc.post,data=d1, clusters = Cod_IBGE)
  reg1.3 <- lm_robust(inc.vs0~POST*N.hd.post,data=d1, clusters = Cod_IBGE)

  #Match untreated municipalities at the start of cycle 1 (elec.period=0)
  to.match <- na.omit(subset(d1,elec.period==0
                           ,select=c(Cod_IBGE,MCMV,pib_pc,pop,hdtotrel.2010)))
  m1 <- matchit(MCMV~pib_pc+pop+hdtotrel.2010+I(hdtotrel.2010^2)+I(log(pib_pc))
              ,replace=T,data=to.match)
  m1 <- subset(get_matches(m1),select=c(Cod_IBGE,id,weights))
  dm1 <- merge(m1,d1,by="Cod_IBGE",all.x=T)
  regm1.1 <- lm_robust(inc.vs0~POST*MCMV,data=dm1, clusters = Cod_IBGE, weights=weights)
  regm1.2 <- lm_robust(inc.vs0~POST*N.pc.post,data=dm1, clusters = Cod_IBGE, weights=weights)
  regm1.3 <- lm_robust(inc.vs0~POST*N.hd.post,data=dm1, clusters = Cod_IBGE, weights=weights)

  #Conditioning on observables by improved locally efficient DR DID:
  #Need to eliminate NAs and use baseline covariates only
  d1_did <- d1
  d1_did$MCMVn <- as.numeric(d1_did$MCMV)
  d1_did$Cod_IBGEn <- as.numeric(paste(d1_did$Cod_IBGE,"1",sep=""))
  #Generate gdp_pc and pop at baseline (t0) 
  t0 <- subset(d1_did,POST==0,select=c(Cod_IBGE,pop,pib_pc))
  names(t0)[-1] <- paste(names(t0)[-1],"t0",sep="_")
  d1_did <- merge(d1_did,t0,by="Cod_IBGE",all.x=T)
  rm(t0)
  # Get rid of municipalities with missing covariate data
  to.drop <- unique(d1_did$Cod_IBGE[which(is.na(d1_did$pop_t0)|
                                          is.na(d1_did$pib_pc_t0)|
                                          is.na(d1_did$hdtot.2010)|
                                          is.na(d1_did$mr.2010))])
  d1_did <- d1_did[-which(is.element(d1_did$Cod_IBGE,to.drop)),]
  cat("Keeping",length(unique(d1_did$Cod_IBGE)),"out of",length(unique(d1$Cod_IBGE))
    ,"municipalities in the drdid estimation of cycle 1.\n")
  regdid1.1 <- drdid(yname = "inc.vs0"
                   , tname = "POST"
                   , idname = "Cod_IBGEn"
                   , dname = "MCMVn"
                   , xformla= ~ pop_t0+pib_pc_t0+hdtotrel.2010+I(hdtotrel.2010^2)+log(pib_pc_t0)+mr.2010
                   , data = d1_did, panel = TRUE)

  #Robsutness: keep municipalities for which the mayor was not term limited in the end period
  could.run <- d1$Cod_IBGE[which(d1$term.limited&d1$POST==T)]
  d1ntl <- subset(d1,is.element(Cod_IBGE,could.run))
  reg1.1ntl <- lm_robust(inc.vs0~POST*MCMV,data= d1ntl, clusters = Cod_IBGE)#not term limited
  
# (2b) 2012-2016 electoral cycle 
to.keep <- d$Cod_IBGE[which(d$elec.period==1&d$N.bin==F)]#places without MCMV in ep=1
d2 <- subset(d,is.element(Cod_IBGE,to.keep)&(elec.period==1|elec.period==2))
  ## Prepare the data for estimation
  d2$MCMV <- is.element(d2$Cod_IBGE,d2$Cod_IBGE[which(d2$N.bin==T)])
  d2$POST <- as.numeric(d2$elec.period==2)
  #Repeat the "treatment" value in the pre-treatment period
  tmp <- subset(d2,select=c(Cod_IBGE,N.pc,N.hd),elec.period==2)
  d2 <- merge(d2,tmp,by="Cod_IBGE",suffixes=c("",".post"),all.x=T)

  #Actual regressions
  reg2.1 <- lm_robust(inc.vs0~POST*MCMV,data=d2, clusters = Cod_IBGE)
  reg2.2 <- lm_robust(inc.vs0~POST*N.pc.post,data=d2, clusters = Cod_IBGE)
  reg2.3 <- lm_robust(inc.vs0~POST*N.hd.post,data=d2, clusters = Cod_IBGE)

  #Match untreated municipalities at start of cycle 2 (elec.period=1)
  to.match <- na.omit(subset(d2,elec.period==1
                           ,select=c(Cod_IBGE,MCMV,pib_pc,pop,hdtotrel.2010)))
  m2 <- matchit(MCMV~pib_pc+pop+hdtotrel.2010+I(hdtotrel.2010^2)+I(log(pib_pc))
              ,replace=T,data=to.match)
  m2 <- subset(get_matches(m2),select=c(Cod_IBGE,id,weights))
  dm2 <- merge(m2,d2,by="Cod_IBGE",all.x=T)
  regm2.1 <- lm_robust(inc.vs0~POST*MCMV,data=dm2, clusters = Cod_IBGE, weights=weights)
  regm2.2 <- lm_robust(inc.vs0~POST*N.pc.post,data=dm2, clusters = Cod_IBGE, weights=weights)
  regm2.3 <- lm_robust(inc.vs0~POST*N.hd.post,data=dm2, clusters = Cod_IBGE, weights=weights)

  #Conditioning on observables by improved locally efficient DR DID:
  #Need to eliminate NAs and use baseline covariates only
  d2_did <- subset(d2, elec.period==1|elec.period==2)
  d2_did$MCMVn <- as.numeric(d2_did$MCMV)
  d2_did$Cod_IBGEn <- as.numeric(paste(d2_did$Cod_IBGE,"2",sep=""))
  #Generate gdp_pc and pop at baseline (t0) 
  t0 <- subset(d2_did,POST==0,select=c(Cod_IBGE,pop,pib_pc))
  names(t0)[-1] <- paste(names(t0)[-1],"t0",sep="_")
  d2_did <- merge(d2_did,t0,by="Cod_IBGE",all.x=T)
  rm(t0)
  # Get rid of municipalities with missing covariate data
  to.drop <- unique(d2_did$Cod_IBGE[which(is.na(d2_did$pop_t0)|
                                          is.na(d2_did$pib_pc_t0)|
                                          is.na(d2_did$hdtot.2010)|
                                          is.na(d2_did$mr.2010))])
  d2_did <- d2_did[-which(is.element(d2_did$Cod_IBGE,to.drop)),]
  cat("Keeping",length(unique(d2_did$Cod_IBGE)),"out of",length(unique(d2$Cod_IBGE))
    ,"municipalities in the drdid estimation of cycle 2.\n")
  regdid2.1 <- drdid(yname = "inc.vs0"
                   , tname = "POST"
                   , idname = "Cod_IBGEn"
                   , dname = "MCMVn"
                   , xformla= ~ pop_t0+pib_pc_t0+hdtotrel.2010+I(hdtotrel.2010^2)+log(pib_pc_t0)+mr.2010
                   , data = d2_did, panel = TRUE)

  #Keep municipalities for which the mayor was not term limited in the end period
  could.run <- d2$Cod_IBGE[which(d2$term.limited==F&d2$POST==T)]
  d2ntl <- subset(d2,is.element(Cod_IBGE,could.run))
  reg2.1ntl <- lm_robust(inc.vs0~POST*MCMV,data=d2ntl, clusters = Cod_IBGE)#not term limited


## (3) Stack the three cycles for "combined" analyses ###
  dpool <- rbind(d1,d2)  
  regp.1 <- lm_robust(inc.vs~POST*MCMV,data=dpool, clusters = Cod_IBGE,fixed_effects=~elec.period)
  regp.2 <- lm_robust(inc.vs~POST*N.pc.post,data=dpool, clusters = Cod_IBGE,fixed_effects=~elec.period)
  regp.3 <- lm_robust(inc.vs~POST*N.hd.post,data=dpool, clusters = Cod_IBGE,fixed_effects=~elec.period)
  
  dmpool <- rbind(dm1,dm2)
  # Can't use weights and fixed effects in lm_robust
  # But assembling the matched data set via get_matches() renders weights irrelevant
  regmp.1 <- lm_robust(inc.vs~POST*MCMV,data=dmpool, clusters = Cod_IBGE,fixed_effects=~elec.period)
  regmp.2 <- lm_robust(inc.vs~POST*N.pc.post,data=dmpool, clusters = Cod_IBGE,fixed_effects=~elec.period)
  regmp.3 <- lm_robust(inc.vs~POST*N.hd.post,data=dmpool, clusters = Cod_IBGE,fixed_effects=~elec.period)
  
  dntlpool <- rbind(d1ntl,d2ntl)
  regntlp.1 <- lm_robust(inc.vs~POST*MCMV,data=dntlpool, clusters = Cod_IBGE,fixed_effects=~elec.period)
  

# (4) Assemble results in main body of the paper

tab.local <- texreg(list(b2008.2012=reg1.1,b2012.2016=reg2.1,bpool=regp.1,bmatched=regmp.1)
                   , include.ci = FALSE
                   , caption = "Local Elections"
                   , caption.above = T
                   , custom.coef.map = list("POST:MCMVTRUE"="MCMV$times$Post")
                   , digits = 3
                   , stars = c(0.01, 0.05, 0.1)
                   , label = "tab:did:mayor"
                   , file = "Tables/tab-2b.tex"
)


## (5) Results for apendices 

## (5a) Appendix K1: Full set of estimates on raw DiD sample (Table K2)
tab.local.k2 <- texreg(list(b2008.2012=reg1.1,b2012.2016=reg2.1,bpool=regp.1)
                      , include.ci = FALSE
                      , caption = "Local Elections"
                      , caption.above = T
                      , digits = 3
                      , stars = c(0.01, 0.05, 0.1)
                      , label = "tab:did:mun:full"
                      , file = "Tables/tab-K2.tex"
)

## (5b) Appendix K2: Alternative specifications of MCMV (continuous treatment)
tab.local.k3 <- texreg(list(pc2008.2012=reg1.2,pc2012.2016=reg2.2,pcpool=regp.2,
                           hd2008.2012=reg1.3,hd2012.2016=reg2.3,pcpool=regp.3)
                      , include.ci = FALSE
                      , custom.coef.map = list("POST:N.pc.post"="POST:MCMV","POST:N.hd.post"="POST:MCMV")
                      , caption = "Local Elections"
                      , caption.above = T
                      , digits = 3
                      , stars = c(0.01, 0.05, 0.1)
                      , label = "tab:did:mayorcont"
                      , file = "Tables/tab-K3b.tex")

## (5c) Appendix K3: Conditioning on pre-treatment covariates via matching (Tables K4)
tab.pres.k4 <- texreg(list(m2008.2012=regm1.1,m2012.2016=regm2.1,mpool=regmp.1)
                      , include.ci = FALSE
                      , caption = "Estimates"
                      , caption.above = T
                      , custom.coef.map = list("POST:MCMVTRUE"="POST:MCMV")
                      , digits = 3
                      , stars = c(0.01, 0.05, 0.1)
                      , label = "tab:did:presmatch"
                      , file = "Tables/tab-K4b.tex")

## (5d) Appendix K3: Conditioning on pre-treatment covariates via DRDID (Table  K5)
## RDDID package generates a strange output, so build table by hand
att = c(regdid1.1$ATT,regdid2.1$ATT)
se = c(regdid1.1$se,regdid2.1$se)
n = c(nrow(d1_did),nrow(d2_did))
pvals = 1/2*pt(att/se, df=n-2, lower.tail = FALSE)
stars <- ifelse(pvals<0.01,"***",
                ifelse(pvals>=0.01&pvals<0.05,"**",
                       ifelse(pvals>=0.05&pvals<0.1,"*","")))
the.tab <- data.frame(rbind(att,se,n))
names(the.tab) <- c("d2008.2012","d2012.2016")
tab.local.k5 <- xtable(the.tab,digits=3)
caption(tab.local.k5) <- "Doubly-robust DiD Estimates, Conditioning on Covariates"
label(tab.local.k5) <- "tab:drdid:mayor" 
tab.local.k5[1,] <- paste("$",sprintf("%01.3f", att),"^{",stars,"}","$",sep="")## replace 1st row 
tab.local.k5[2,] <- paste("$(",sprintf("%01.3f", se),")$",sep="")## replace 2nd row 
print(tab.local.k5,digits=3,caption.placement= "top",
      file="Tables/tab-K5b.tex")


# (5d) Appendix K.5 Local elections in which incumbents could run Table K7

  # Share of municipalities with the incumbent mayor running
  tmp0 <- prop.table(table(d$term.limited==F,d$elec.period),2) #could run
  tmp1 <- prop.table(table(d$is.inc,d$elec.period),2) #actually ran
  # Share of municipalities with an identified incumbent supported candidate by cycle
  tmp2 <- prop.table(table(is.na(d$inc.vs)==F,d$elec.period),2)
  
tmp <- data.frame(Could.Run=tmp0[2,],Mayor.Ran=tmp1[2,],Incumb.Cand=tmp2[2,])
rownames(tmp) <- c("2004--2008","2008--2012","2012--2016","2016--2020")
tmp <- xtable(tmp*100,digits=2)
caption(tmp) <-   "Data on Mayoral Incumbent Candidates"
label(tmp) <- "tab:incumbencystatus"
print(tmp
      ,caption.placement = "top"
      ,file = "Tables/tab-K7.tex")

# (5e) Appendix K5 Table K8: Local elections, non term-limited mayors
tab.local.k8 <- texreg(list(b2006.2010=reg1.1ntl,b2010.2014=reg2.1ntl,combined=regntlp.1)
                       , include.ci = FALSE
                       , caption = "Local Elections"
                       , caption.above = T
                       , digits = 3
                       , stars = c(0.01, 0.05, 0.1)
                       , label = "tab:did:mun:full"
                       , file = "Tables/tab-K8.tex"
)
